#Alexander F. Gazmararian
#afg2@princeton.edu
#January 9, 2024

#Load packages
library(tidyverse)
library(tidylog)
library(here)
library(modelsummary)
library(kableExtra)

#Function to format latex tables
source(here("code", "fun", "fix_txt.R"))
#Custom function
`%notin%` <- Negate(`%in%`)

#Load data on population
g <- read.csv(here("data", "input", "nhgis", "nhgis0013_ts_nominal_county.csv"))
#Setup FIPS codes
g$STATEFP <- str_pad(g$STATEFP, 2, "left", "0")
g$COUNTYFP <- str_pad(g$COUNTYFP, 3, "left", "0")
g$fips <- as.numeric(paste0(g$STATEFP, g$COUNTYFP))
#Adjust names
names(g) <- tolower(names(g))
g <- g %>% rename(pop = av0aa)
#Select main variables
g <- subset(g, select = c(year, fips, pop))
# Load matched data
elec <- readRDS(here("data", "output", "matched_df.rds"))
#Subset to relevant variables
g <- subset(g, fips %in% elec$fips)

#1) that even with earlier population decline, still voting Democratic
elec %>%
  group_by(treat, year) %>%
  summarize(Republican = mean(RepVotes), Democrat = mean(DemVotes)) %>%
  pivot_longer(cols = c(Republican, Democrat)) %>%
  ggplot() +
  geom_point(aes(x = year, y = value, color = name), size = 3) +
  facet_wrap(~ treat) +
  theme_bw() +
  scale_x_continuous(breaks = seq(1972, 2020, 4)) +
  labs(y = "Total Votes by Party", x = "Year")

#2) even with later population decline, the change in votes exceeds what the population outflow would indicate
g.wide <- g %>%
  mutate(year = paste0("x", year)) %>%
  pivot_wider(names_from = year, values_from = pop)
g.wide$chg70to00 <- with(g.wide, x2000 - x1970)
hist(g.wide$chg70to00)

elec.treat <- subset(elec, select = c(treat, fips))
elec.treat <- unique(elec.treat)

g.long <- left_join(g, elec.treat, by = "fips")

g.long <- g.long %>%
  group_by(fips) %>%
  mutate(popchg = pop - lag(pop))

g.long %>%
  filter(treat == 1) %>%
  ggplot(aes(x = year, y = pop)) +
  geom_violin(aes(group = year)) +
  stat_smooth(se = FALSE) +
  theme_bw(base_size = 13) +
  labs(x = "Year", y = "Population") +
  scale_x_continuous(breaks = seq(1970, 2020, 10))

g.long %>%
  filter(treat == 1) %>%
  group_by(year) %>%
  ggplot() +
  geom_hline(yintercept = 0, color = "grey") +
  geom_boxplot(aes(x = year, y = popchg, group = year)) +
  theme_bw(base_size = 13) +
  labs(x = "Census Year", y = "Population Change from Prior Census") +
  theme(panel.grid = element_blank())

# group elections into census-year bins
elec.census <-
  elec %>%
  mutate(
    censusyear = case_when(
      year %in% c(1972, 1976) ~ 1970,
      year %in% c(1980, 1984) ~ 1980,
      year %in% c(1988, 1992) ~ 1990,
      year %in% c(1996, 2000) ~ 2000,
      year %in% c(2004, 2008, 2012) ~ 2010,
      year %in% c(2014, 2016, 2020) ~ 2020
    )
  ) %>%
  group_by(censusyear, fips) %>%
  summarise(
    treat = median(treat),
    RepVotes = mean(RepVotes),
    DemVotes = mean(DemVotes),
    TotalVotes = mean(TotalVotes)
  ) %>%
  group_by(fips) %>%
  mutate(
    RepVotesChg = RepVotes - lag(RepVotes),
    DemVotesChg = DemVotes - lag(DemVotes),
    TotalVotesChg = TotalVotes - lag(TotalVotes)
  ) %>%
  rename(year = censusyear)

elec.census <- left_join(elec.census, g.long, by = c("fips", "year", "treat"))

elec.census <- elec.census %>%
  mutate(
    dem_pop_dif = DemVotesChg - popchg,
    rep_pop_dif = RepVotesChg - popchg,
    total_pop_dif = TotalVotesChg - popchg
  )

elec.census <- elec.census %>%
  mutate(decrease = ifelse(TotalVotesChg < 0 & popchg < 0 & total_pop_dif > 0, 1, 0),
         decreasedem = ifelse(DemVotesChg < 0 & popchg < 0 & dem_pop_dif > 0, 1, 0))

# How many have had decreases in total votes and population?
elec.census %>%
  group_by(treat) %>%
  summarise(
    n = n(),
    decrease = sum(decrease, na.rm = TRUE)
  ) %>%
  mutate(prop = decrease / n)

elec.census %>%
  group_by(treat) %>%
  summarise(
    n = n(),
    decreasedem = sum(decreasedem, na.rm = TRUE)
  ) %>%
  mutate(prop = decreasedem / n)

# Subset to those that have had decreases in total votes and population
d <- subset(elec.census, decrease == 1 & treat == 1)
hist(d$total_pop_dif)
##Positive values mean that the change in population is greater than the change in total votes
d$lb <- d$popchg * -1
d$ub <- 0
d$mid <- (-1 * d$popchg) * .5
d$mid25 <- (-1 * d$popchg) * .25
d$mid75 <- (-1 * d$popchg) * .75

d <- subset(d, select = c(fips, year, lb, ub, mid, mid25, mid75))

ct <- left_join(elec.census, d, by = c("fips", "year"))
ct <- ct %>% mutate(across(lb:mid75, ~ ifelse(is.na(.x), 0, .x)))

#Base
ct$gopshare <- with(ct, RepVotes / TotalVotes) * 100
#If all population loss is Republicans
ct$gopshare_ub <- with(ct, (RepVotes + lb) / (TotalVotes + lb)) * 100
#If 25% of population loss is Democrats
ct$gopshare_mid25 <- with(ct, (RepVotes + mid25) / (TotalVotes + mid25 + mid75)) * 100
#If 50% of population loss is Democrats
ct$gopshare_mid <- with(ct, (RepVotes + mid) / (TotalVotes + mid * 2)) * 100
#If 75% of population loss is Democrats
ct$gopshare_mid75 <- with(ct, (RepVotes + mid75) / (TotalVotes + mid25 + mid75)) * 100
#If 100% of population loss is democrats
ct$gopshare_lb <- with(ct, RepVotes / (TotalVotes + lb)) * 100

# Estimate simple DID under counterfactuals
m1 <- list()
m1[["Base"]] <- lm(gopshare ~ treat * I(year>=2010), ct)
m1[["0\\%"]] <- lm(gopshare_ub ~ treat * I(year>=2010), ct)
m1[["25\\%"]] <- lm(gopshare_mid75 ~ treat * I(year>=2010), ct)
m1[["50\\%"]] <- lm(gopshare_mid ~ treat * I(year>=2010), ct)
m1[["75\\%"]] <- lm(gopshare_mid25 ~ treat * I(year>=2010), ct)
m1[["100\\%"]] <- lm(gopshare_lb ~ treat * I(year>=2010), ct)

# Now do this again but with Democratic voters instead
# Subset to those that have had decreases in total votes and population
d2 <- subset(elec.census, decreasedem == 1 & treat == 1)
hist(d2$dem_pop_dif)
##Positive values mean that the change in population is greater than the change in total votes
d2$lb <- d2$popchg * -1
d2$ub <- 0
d2$mid <- (-1 * d2$popchg) * .5
d2$mid25 <- (-1 * d2$popchg) * .25
d2$mid75 <- (-1 * d2$popchg) * .75

d2 <- subset(d2, select = c(fips, year, lb, ub, mid, mid25, mid75))

ct2 <- left_join(elec.census, d2, by = c("fips", "year"))
ct2 <- ct2 %>% mutate(across(lb:mid75, ~ ifelse(is.na(.x), 0, .x)))
#Base
ct2$gopshare <- with(ct2, RepVotes / TotalVotes) * 100
#If all population loss is Republicans
ct2$gopshare_ub <- with(ct2, (RepVotes + lb) / (TotalVotes + lb)) * 100
#If 25% of population loss is Democrats
ct2$gopshare_mid25 <- with(ct2, (RepVotes + mid25) / (TotalVotes + mid25 + mid75)) * 100
#If 50% of population loss is Democrats
ct2$gopshare_mid <- with(ct2, (RepVotes + mid) / (TotalVotes + mid * 2)) * 100
#If 75% of population loss is Democrats
ct2$gopshare_mid75 <- with(ct2, (RepVotes + mid75) / (TotalVotes + mid25 + mid75)) * 100
#If 100% of population loss is democrats
ct2$gopshare_lb <- with(ct2, RepVotes / (TotalVotes + lb)) * 100

# Estimate simple DID under counterfactuals
m2 <- list()
m2[["Base"]] <- lm(gopshare ~ treat * I(year>=2010), ct2)
m2[["0\\%"]] <- lm(gopshare_ub ~ treat * I(year>=2010), ct2)
m2[["25\\%"]] <- lm(gopshare_mid75 ~ treat * I(year>=2010), ct2)
m2[["50\\%"]] <- lm(gopshare_mid ~ treat * I(year>=2010), ct2)
m2[["75\\%"]] <- lm(gopshare_mid25 ~ treat * I(year>=2010), ct2)
m2[["100\\%"]] <- lm(gopshare_lb ~ treat * I(year>=2010), ct2)

# Create table
panels <- list(
  "Decline in Total Votes and Population" = m1,
  "Decline in Democratic Votes and Population" = m2
)
#Format table
gm <- gof_map
gm$omit <- TRUE
gm$omit[gm$raw=="nobs"] <- FALSE
gm$clean[gm$raw=="nobs"] <- "$N$"

## Table I1 ----
file_tab <- here("output", "tables", "si_tab_I1_comp_bias.txt")
modelsummary(
  panels,
  shape = "rbind",
  vcov = "HC2",
  fmt = 2,
  stars = c("*"=.1,"**"=.05,"***"=.01),
  coef_map = c(
    "treat:I(year >= 2010)TRUE"="ATT"
  ),
  gof_map = gm,
  output = "latex",
  escape = FALSE
) %>%
  add_header_above(c(" "=2,"Democratic Share of Population Loss $(\\\\gamma)$"=5), escape = FALSE) %>%
  cat(., file = file_tab)
fix_txt(file_tab)

# Figure I1----
fig.pop <- elec.census %>%
  filter(treat == 1) %>%
  ggplot() +
  geom_hline(yintercept = 0, color = "grey") +
  geom_boxplot(aes(x = year, y = popchg, group = year)) +
  theme_bw(base_size = 13) +
  scale_y_continuous(limits = c(-30000,30000)) +
  theme(panel.grid = element_blank()) +
  labs(y = "Population Change from Prior Census", x = "Year")
ggsave(
  fig.pop,
  file = here("output", "figures", "si_fig_I1_popchg.png"),
  width = 6,
  height = 2.8, 
  dpi = 300,
  scale = 1.5
)
